**************************************************************************************************
*** This do file creates the replication datasets for 											*/
*** TITLE																					 	*/
*** AUTHOR																						*/
*** AUTHOR																						*/
*** AUTHOR																						*/
*** Published in: ***																			*/
**************************************************************************************************


/* File description - "dataset_preparation_CAWI.do":
- indicates the link to download all the original datasets needed
- INPUT: sub-folder "original" containing all original datasets
- OUTPUT: recoded datasets in sub-folder "recoded" and "merged.dta" in current directory
- it cleans and recodes the dataset, and keeps only the variables of interest
*/


**************************************************************************************************


/* Before proceeding:
1) Execute "dataset_preparation_CATI.do"
2) Execute "dataset_preparation_CAWI.do"
*/

**************************************************************************************************
clear
set more off

global current "C:/Users/andre/Dropbox/Ricerca/PAPER/2016 Text Analysis Italian TV/03_Analysis/"
cd "$current"

global original "${current}data/external/"
global recoded "${current}data/processed/"

************************************
******* CREATE PANEL DATASET  ******
************************************
use "${recoded}merged_dataset_ID.dta", clear


* TV variables are only asked in waves 1, 2, and 3:
drop *_w4 *_w5

* Drop missing values:
drop if interview_date_w1==.
drop if interview_date_w2==.
drop if interview_date_w3==.

reshape long interview_date_ tg_ tg_freq_ ptv_pd_ ptv_pdl_ ptv_ln_ vote_pdl_ vote_gov_ trust_mag_ best_mip_ best_immig_ ///
	berl_ pid_pd_ pid_pdl_ polint_ lr_ lrdist_pdl_ lrdist_pd_ retsoc_ retsoc01_ gov_ vintent_, ///
	i(id) j(wave) string

rename *_ *

encode wave, gen(wave_num)
drop wave
rename wave_num wave
tab wave, gen(w)
sort id wave
gen id_wave=_n

count

xtset id wave
order id_wave wave id

save "${recoded}panel.dta", replace


***** Creating wide version for descriptive statistics:
use "${recoded}merged_dataset_ID.dta", clear

* TV variables are only asked in waves 1, 2, and 3:
drop *_w4 *_w5

* Drop missing values:
drop if interview_date_w1==.
drop if interview_date_w2==.
drop if interview_date_w3==.

keep ptv_pdl_* tg_* tg_freq_* gov_* berl_* lr_* lrdist_pdl_* 
order age sex educ polinfo tg_* tg_freq_* ptv_pdl_* 

save "${recoded}panel_descr.dta", replace
